{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Time Series\n",
    "## DatetimeIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let's start by importing the packages we use in this chapter\n",
    "# and by setting the plotting backend to Plotly\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "pd.options.plotting.backend = \"plotly\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This creates a DatetimeIndex based on a start timestamp,\n",
    "# number of periods and frequency (\"D\" = daily).\n",
    "daily_index = pd.date_range(\"2020-02-28\", periods=4, freq=\"D\")\n",
    "daily_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This creates a DatetimeIndex based on start/end timestamp.\n",
    "# The frequency is set to \"weekly on Sundays\" (\"W-SUN\").\n",
    "weekly_index = pd.date_range(\"2020-01-01\", \"2020-01-31\", freq=\"W-SUN\")\n",
    "weekly_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Construct a DataFrame based on the weekly_index. This could be\n",
    "# the visitor count of a museum that only opens on Sundays.\n",
    "pd.DataFrame(data=[21, 15, 33, 34],\n",
    "             columns=[\"visitors\"], index=weekly_index)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft = pd.read_csv(\"csv/MSFT.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.loc[:, \"Date\"] = pd.to_datetime(msft[\"Date\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.dtypes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft = pd.read_csv(\"csv/MSFT.csv\",\n",
    "                   index_col=\"Date\", parse_dates=[\"Date\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.loc[:, \"Volume\"] = msft[\"Volume\"].astype(\"float\")\n",
    "msft[\"Volume\"].dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft = msft.sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.index.date"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.loc[\"2019\", \"Adj Close\"]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft.loc[\"2019-06\":\"2020-05\", \"Adj Close\"].plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Working with Time Zones"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add the time information to the date\n",
    "msft_close = msft.loc[:, [\"Adj Close\"]].copy()\n",
    "msft_close.index = msft_close.index + pd.DateOffset(hours=16)\n",
    "msft_close.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Make the timestamps time-zone-aware\n",
    "msft_close = msft_close.tz_localize(\"America/New_York\")\n",
    "msft_close.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft_close = msft_close.tz_convert(\"UTC\")\n",
    "msft_close.loc[\"2020-01-02\", \"Adj Close\"]  # 21:00 without DST"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft_close.loc[\"2020-05-01\", \"Adj Close\"]  # 20:00 with DST"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Shifting and Percentage Changes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft_close.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "msft_close.shift(1).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "returns = np.log(msft_close / msft_close.shift(1))\n",
    "returns = returns.rename(columns={\"Adj Close\": \"returns\"})\n",
    "returns.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot a histogram with the daily log returns\n",
    "returns.plot.hist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "simple_rets = msft_close.pct_change()\n",
    "simple_rets = simple_rets.rename(columns={\"Adj Close\": \"simple rets\"})\n",
    "simple_rets.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Rebasing and Correlation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "parts = []  # List to collect individual DataFrames\n",
    "for ticker in [\"AAPL\", \"AMZN\", \"GOOGL\", \"MSFT\"]:\n",
    "    # \"usecols\" allows us to only read in the Date and Adj Close\n",
    "    adj_close = pd.read_csv(f\"csv/{ticker}.csv\",\n",
    "                            index_col=\"Date\", parse_dates=[\"Date\"],\n",
    "                            usecols=[\"Date\", \"Adj Close\"])\n",
    "    # Rename the column into the ticker symbol\n",
    "    adj_close = adj_close.rename(columns={\"Adj Close\": ticker})\n",
    "    # Append the stock's DataFrame to the parts list\n",
    "    parts.append(adj_close)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Combine the 4 DataFrames into a single DataFrame\n",
    "adj_close = pd.concat(parts, axis=1)\n",
    "adj_close"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "adj_close = adj_close.dropna()\n",
    "adj_close.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Use a sample from June 2019 - May 2020\n",
    "adj_close_sample = adj_close.loc[\"2019-06\":\"2020-05\", :]\n",
    "rebased_prices = adj_close_sample / adj_close_sample.iloc[0, :] * 100\n",
    "rebased_prices.head(2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rebased_prices.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Correlation of daily log returns\n",
    "returns = np.log(adj_close / adj_close.shift(1))\n",
    "returns.corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import plotly.express as px"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = px.imshow(returns.corr(),\n",
    "                x=adj_close.columns,\n",
    "                y=adj_close.columns,\n",
    "                color_continuous_scale=list(\n",
    "                    reversed(px.colors.sequential.RdBu)),\n",
    "                zmin=-1, zmax=1)\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Resampling"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "end_of_month = adj_close.resample(\"M\").last()\n",
    "end_of_month.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "end_of_month.resample(\"D\").asfreq().head()  # No transformation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "end_of_month.resample(\"W-FRI\").ffill().head()  # Forward fill"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Rolling Windows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Plot the moving average for MSFT with data from 2019\n",
    "msft19 = msft.loc[\"2019\", [\"Adj Close\"]].copy()\n",
    "# Add the 25 day moving average as a new column to the DataFrame\n",
    "msft19.loc[:, \"25day average\"] = msft19[\"Adj Close\"].rolling(25).mean()\n",
    "msft19.plot()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
